{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "Sequential(\n  (0): Linear(in_features=4, out_features=3, bias=True)\n  (1): ReLU()\n  (2): Linear(in_features=3, out_features=1, bias=True)\n)\n"
    }
   ],
   "source": [
    "import torch\n",
    "import torch.nn as nn  \n",
    "\n",
    "net = nn.Sequential(nn.Linear(4,3),nn.ReLU(),nn.Linear(3,1))\n",
    "\n",
    "print(net)\n",
    "x = torch.rand(2,4)\n",
    "y = net(x).sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 访问模型参数\n",
    "\n",
    "可以使用`Module`类的`parameters()`和`named_parameters()`方法来访问所有的参数（以迭代器的形式返回）。`named_parameters`会返回参数的名称."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "<class 'generator'>\n0.weight torch.Size([3, 4])\n0.bias torch.Size([3])\n2.weight torch.Size([1, 3])\n2.bias torch.Size([1])\n"
    }
   ],
   "source": [
    "print(type(net.named_parameters()))\n",
    "\n",
    "for name,param in net.named_parameters():\n",
    "    print(name,param.size())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "从上面可知`named_parameters()`返回的是迭代器。　使用`Sequential`构建的网络返回的name为层的编号，还可以使用`[]`来访问网络的任一层"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "weight torch.Size([3, 4]) <class 'torch.nn.parameter.Parameter'>\nbias torch.Size([3]) <class 'torch.nn.parameter.Parameter'>\n"
    }
   ],
   "source": [
    "for name,param in net[0].named_parameters():\n",
    "    print(name,param.size(),type(param))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`param`的类型为`torch.nn.parameter.Parameter`，这是一个`Tensor`的子类，和`Tensor`不同的是，如果一个`Tensor`是`Parameter`，它会自动的被添加到模型参数列表里。\n",
    "\n",
    "`Parameter`拥有`Tensor`的所有属性，可以使用`data`来访问数据，使用`grad`访问梯度"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "weight1\n"
    }
   ],
   "source": [
    "class MyModel(nn.Module):\n",
    "    def __init__(self,**kwargs):\n",
    "        super(MyModel,self).__init__(**kwargs)\n",
    "        self.weight1 = nn.Parameter(torch.rand(20,20))\n",
    "        self.weight2 = torch.rand(20,20)\n",
    "\n",
    "    def forward(self,x):\n",
    "        pass\n",
    "\n",
    "n = MyModel()\n",
    "for name,param in n.named_parameters():\n",
    "    print(name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "tensor([[-0.3461,  0.3694,  0.1778, -0.3958],\n        [-0.0034, -0.2368,  0.2241, -0.2926],\n        [ 0.3248, -0.2238, -0.2350,  0.2400]])\nNone\n"
    }
   ],
   "source": [
    "weight_0 = list(net[0].parameters())[0]\n",
    "print(weight_0.data)\n",
    "print(weight_0.grad)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "0.weight tensor([[ 0.0031, -0.0003, -0.0184,  0.0004],\n        [ 0.0011, -0.0036,  0.0086,  0.0271],\n        [-0.0099, -0.0080,  0.0083,  0.0156]])\n0.bias tensor([0., 0., 0.])\n2.weight tensor([[-0.0035,  0.0088, -0.0207]])\n2.bias tensor([0.])\n"
    }
   ],
   "source": [
    "# 分别对权重和bias使用不同的初始化方法\n",
    "for name,param in net.named_parameters():\n",
    "    if 'weight' in name:\n",
    "        nn.init.normal_(param,mean=0,std=0.01)\n",
    "        print(name,param.data)\n",
    "\n",
    "    elif 'bias' in name:\n",
    "        nn.init.constant_(param,val=0)\n",
    "        print(name,param.data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "0.weight Parameter containing:\ntensor([[ 0.0000, -0.0000,  5.9529, -0.0000],\n        [-9.6475, -0.0000, -0.0000,  7.9089],\n        [-9.5902, -7.5929,  0.0000, -5.8976]], requires_grad=True)\n2.weight Parameter containing:\ntensor([[ 9.7117, -6.2379, -7.2058]], requires_grad=True)\n"
    }
   ],
   "source": [
    "# 自定义初始化方法 \n",
    "\n",
    "def normal_(tensor,mean=0,std=0.01):\n",
    "    with torch.no_grad(): # 不记录梯度，改变tensor的值\n",
    "        return tensor.normal_(mean,std)\n",
    "\n",
    "def init_weight(tensor):\n",
    "    with torch.no_grad():\n",
    "        tensor.uniform_(-10,10)\n",
    "        tensor *= (tensor.abs() >= 5).float()\n",
    "\n",
    "for name,param in net.named_parameters():\n",
    "    if 'weigh' in name:\n",
    "        init_weight(param)\n",
    "        print(name,param)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 共享模型参数\n",
    "在多个层之间共享模型参数，有两种方法：\n",
    "- `forward`函数多次调用同一个层\n",
    "- 传入`Sequential`的模块市同一个`Module`实例的话参数也是共享的"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "Sequential(\n  (0): Linear(in_features=1, out_features=1, bias=False)\n  (1): Linear(in_features=1, out_features=1, bias=False)\n)\n0.weight tensor([[3.]])\ntensor([[9.]], grad_fn=<MmBackward>)\ntensor([[6.]])\n"
    }
   ],
   "source": [
    "linear = nn.Linear(1,1,bias=False)\n",
    "net= nn.Sequential(linear,linear)\n",
    "print(net)\n",
    "for name,param in net.named_parameters():\n",
    "    nn.init.constant_(param,val=3)\n",
    "    print(name,param.data)\n",
    "\n",
    "x = torch.ones(1,1)\n",
    "y = net(x)\n",
    "print(y)\n",
    "y.backward()\n",
    "print(net[0].weight.grad)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## GPU计算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "True\n1\n0\nGeForce GTX 1060 6GB\n(6, 1)\n"
    }
   ],
   "source": [
    "import torch\n",
    "import torch.nn as nn \n",
    "\n",
    "print(torch.cuda.is_available())\n",
    "print(torch.cuda.device_count())\n",
    "print(torch.cuda.current_device())\n",
    "print(torch.cuda.get_device_name(0))\n",
    "print(torch.cuda.get_device_capability(0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "tensor([1, 2, 3], device='cuda:0')\ntensor([[0.6596, 0.8143, 0.7888, 0.4173],\n        [0.0165, 0.9277, 0.0844, 0.9370],\n        [0.6718, 0.2953, 0.8057, 0.5329]], device='cuda:0')\n"
    }
   ],
   "source": [
    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
    "\n",
    "x = torch.tensor([1,2,3],device=device)\n",
    "\n",
    "y = torch.rand(3,4)\n",
    "y = y.to(device)\n",
    "\n",
    "print(x)\n",
    "print(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "device(type='cpu')"
     },
     "metadata": {},
     "execution_count": 18
    }
   ],
   "source": [
    "net = nn.Linear(3,1)\n",
    "list(net.parameters())[0].device"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "device(type='cuda', index=0)"
     },
     "metadata": {},
     "execution_count": 19
    }
   ],
   "source": [
    "net.cuda()\n",
    "list(net.parameters())[0].device"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": "tensor([[0.9744, 0.5742, 0.1584, 0.6088, 0.7109],\n        [0.7951, 0.6606, 0.1587, 0.3705, 0.0154],\n        [0.4641, 0.8261, 0.0737, 0.6368, 0.7628],\n        [0.9285, 0.8652, 0.8189, 0.4539, 0.2267]])\ntensor([[0.9744, 0.5742, 0.1584, 0.6088, 0.7109],\n        [0.7951, 0.6606, 0.1587, 0.3705, 0.0154],\n        [0.4641, 0.8261, 0.0737, 0.6368, 0.7628],\n        [0.9285, 0.8652, 0.8189, 0.4539, 0.2267]], device='cuda:0')\n"
    }
   ],
   "source": [
    "z = torch.rand(4,5)\n",
    "z.cuda()\n",
    "print(z)\n",
    "z = z.cuda()\n",
    "print(z)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7-final"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "python37764bitpytorchnotebookconda6e7a8693df0d4d92aca09d521275d23a",
   "display_name": "Python 3.7.7 64-bit ('pytorch_notebook': conda)"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}